github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/examples/Interactive Beam Running on Flink.ipynb (about)

     1  {
     2   "cells": [
     3    {
     4     "cell_type": "markdown",
     5     "metadata": {},
     6     "source": [
     7      "<!--\n",
     8      "    Licensed to the Apache Software Foundation (ASF) under one\n",
     9      "    or more contributor license agreements.  See the NOTICE file\n",
    10      "    distributed with this work for additional information\n",
    11      "    regarding copyright ownership.  The ASF licenses this file\n",
    12      "    to you under the Apache License, Version 2.0 (the\n",
    13      "    \"License\"); you may not use this file except in compliance\n",
    14      "    with the License.  You may obtain a copy of the License at\n",
    15      "\n",
    16      "      http://www.apache.org/licenses/LICENSE-2.0\n",
    17      "\n",
    18      "    Unless required by applicable law or agreed to in writing,\n",
    19      "    software distributed under the License is distributed on an\n",
    20      "    \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
    21      "    KIND, either express or implied.  See the License for the\n",
    22      "    specific language governing permissions and limitations\n",
    23      "    under the License.\n",
    24      "-->\n",
    25      "\n",
    26      "# Interactive Beam Running on Flink"
    27     ]
    28    },
    29    {
    30     "cell_type": "code",
    31     "execution_count": null,
    32     "metadata": {},
    33     "outputs": [],
    34     "source": [
    35      "import apache_beam as beam\n",
    36      "from apache_beam.runners.interactive import interactive_runner\n",
    37      "from apache_beam.runners.portability import flink_runner\n",
    38      "\n",
    39      "p = beam.Pipeline(interactive_runner.InteractiveRunner(underlying_runner=flink_runner.FlinkRunner()))"
    40     ]
    41    },
    42    {
    43     "cell_type": "code",
    44     "execution_count": null,
    45     "metadata": {},
    46     "outputs": [],
    47     "source": [
    48      "init_pcoll = p | beam.Create(range(10))\n",
    49      "squares = init_pcoll | 'Square' >> beam.Map(lambda x: x*x)\n",
    50      "cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)\n",
    51      "result = p.run()\n",
    52      "result.wait_until_finish()"
    53     ]
    54    },
    55    {
    56     "cell_type": "code",
    57     "execution_count": null,
    58     "metadata": {},
    59     "outputs": [],
    60     "source": [
    61      "result.get(squares)"
    62     ]
    63    },
    64    {
    65     "cell_type": "code",
    66     "execution_count": null,
    67     "metadata": {},
    68     "outputs": [],
    69     "source": [
    70      "class AverageFn(beam.CombineFn):\n",
    71      "  def create_accumulator(self):\n",
    72      "    return (0.0, 0)\n",
    73      "\n",
    74      "  def add_input(self, sum_count, input):\n",
    75      "    (sum, count) = sum_count\n",
    76      "    return sum + input, count + 1\n",
    77      "\n",
    78      "  def merge_accumulators(self, accumulators):\n",
    79      "    sums, counts = zip(*accumulators)\n",
    80      "    return sum(sums), sum(counts)\n",
    81      "\n",
    82      "  def extract_output(self, sum_count):\n",
    83      "    (sum, count) = sum_count\n",
    84      "    return sum / count if count else float('NaN')"
    85     ]
    86    },
    87    {
    88     "cell_type": "code",
    89     "execution_count": null,
    90     "metadata": {},
    91     "outputs": [],
    92     "source": [
    93      "average_square = squares | 'Average Square' >> beam.CombineGlobally(AverageFn())\n",
    94      "average_cube = cubes | 'Average Cube' >> beam.CombineGlobally(AverageFn())\n",
    95      "result = p.run()"
    96     ]
    97    },
    98    {
    99     "cell_type": "code",
   100     "execution_count": null,
   101     "metadata": {},
   102     "outputs": [],
   103     "source": [
   104      "result.get(average_square)"
   105     ]
   106    }
   107   ],
   108   "metadata": {
   109    "language_info": {
   110     "codemirror_mode": {
   111      "name": "ipython",
   112      "version": 3
   113     },
   114     "file_extension": ".py",
   115     "mimetype": "text/x-python",
   116     "name": "python",
   117     "nbconvert_exporter": "python",
   118     "pygments_lexer": "ipython3",
   119     "version": "3.7.5rc1"
   120    }
   121   },
   122   "nbformat": 4,
   123   "nbformat_minor": 4
   124  }